# coding:utf-8
import json
import xlwt
import os
from cnews_group import getCategories
from getAllLeafs_ import getAllLeafs
with open(r'D:\文献-分类\minshi.json','r',encoding='UTF-8') as f:
    data=json.load(f)
    data=data[0]
n=0
wb=xlwt.Workbook()
ws=wb.add_sheet('Sheet1')
leafList=getAllLeafs(data)
'''
for d in data:
    n+=1
    Nlabel1=1
    if n<3:
        Nlabel1=d['label_count']
    for i in range(Nlabel1):
        Nlabel2=d['childlist'][i]['label_count']
        if n==3:
            Nlabel2=30
        for j in range(Nlabel2):
            Nlabel3=d['childlist'][i]['childlist'][j]['label_count']
            for k in range(Nlabel3):
                print(n,Nlabel1,i,Nlabel2,j,Nlabel3,k)
                leaf=d['childlist'][i]['childlist'][j]['childlist'][k]['DM']
                if leaf not in leafList:
                    leafList.append(leaf)
'''
leafList=sorted(leafList)
print('leafList',leafList)
fileList=os.listdir(r'D:\anyou\minshi\src/')
fileList=sorted(fileList)
exist=[]
for file in fileList:
    exist.append(file.replace('.','/').split('/')[0])
existY=[]
leafListY=[]
print('全部情况','lenLeaf:',len(leafList),'lenExist:',len(exist))
for l in leafList:
    if l  in exist:
        existY.append(l)
for e in exist:
    if e in leafList:
        leafListY.append(e)
for l in leafListY:
    leafList.remove(l)
    exist.remove(l)
print('剔除以后','lenLeaf:',len(leafList),'lenExist:',len(exist))

print('existY',existY)
print('leafListY',leafListY)
print('leafList',leafList)
print('len',len(existY))
print('exist',exist)
print('len',len(leafListY))

useLabel=getCategories()

uY=[]
for u in useLabel:
    if u in existY:
        uY.append(u)
print(len(uY))



for i in range(len(uY)):
    ws.write(i,0,uY[i])
'''
for i in range(len(existY)):
    ws.write(i,1,existY[i])
'''
wb.save(r'D:\anyou\minshi\minshi_labels_9000_whole-1.xls')

